1 Objective

The goal of this session is to get a clean macula densa dataset to work with.

2 Load Packages

options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976

3 Load Dataset

SO1 <- LoadSeuratRds(here("outputs", "so_merge_raw.rds"))

head(SO1@meta.data)

4 How many of these are actually MD cells

SO.markers <- FindAllMarkers(SO1, only.pos = TRUE)
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 5) %>%
    ungroup() -> top10
DoHeatmap(SO1, features = top10$gene) + NoLegend()

markers.to.plot1 <- c("Lrp2",         # PT
                      "Slc5a12",      # PT-S1
                      "Slc13a3",      # PT-S2
                      "Slc16a9",      # PT-S3
                      "Havcr1",       # Injured PT
                      "Epha7",        # dTL
                      "Cryab",        # dTL
                      "Cdh13",        # dTL1
                      "Slc14a2",      # dTL2
                      "Slc12a1",      # TAL
                      "Umod",         # TAL, DCT1
                      "Egf",          # TAL, DCT1,
                      "Cldn10",       # TAL
                      "Cldn16",       # TAL
                      "Nos1",         # MD
                      "Slc12a3",      # DCT
                      "Pvalb",        # DCT1
                      "Slc8a1",       # DCT2, CNT
                      "Aqp2",         # PC
                      "Slc4a1",       # IC-A
                      "Slc26a4",      # IC-B
                      "Nphs1",        # Podo
                      "Ncam1",        # PEC
                      "Flt1",         # Endo
                      "Emcn",         # Glom Endo
                      "Kdr",          # Capillary Endo
                      "Pdgfrb",       # Perivascular
                      "Pdgfra",       # Fib
                      "Piezo2",       # Mesangial
                      "Acta2",        # Mural
                      "Ptprc",        # Immune
                      "Cd74",         # Macrophage
                      "Skap1",        # B/T Cells 
                      "Upk1b",        # Uro
                      "Top2a"         # Proliferation
)
                      
DotPlot(SO1,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

DimPlot(SO1)

5 Subset out non MD cells.

SO2 <- subset(SO1, idents = c("4", "5", "6", "7"), invert = TRUE)

DimPlot(SO2)

SO2 <- SCTransform(SO2) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:30) %>%
    FindClusters() %>%
    RunUMAP(dims = 1:30)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 22835 by 11529
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 3 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 308 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 22835 genes
## Computing corrected count matrix for 22835 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 25.13912 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1 
## Positive:  Umod, Egf, Tmem52b, Sult1d1, Klk1, Sostdc1, Wfdc2, Fabp3, Cox6c, Prdx5 
##     Ly6a, Foxq1, Wfdc15b, Atp1a1, Slc25a5, Krt7, Cox4i1, Cox5b, Ckb, Atp5g1 
##     Cox8a, Cldn19, Ndufa4, Atp1b1, Ggt1, Atp5b, Chchd10, Gm47708, Cox6b1, Uqcrq 
## Negative:  Pappa2, Malat1, Gm37376, Mir6236, Zfand5, Robo2, Neat1, Aard, Wwc2, Pde10a 
##     Itga4, Nadk2, Col4a4, Sdc4, Nos1, Col4a3, Sgms2, S100g, Ptgs2, Etnk1 
##     Ramp3, Irx1, Itprid2, Hnrnpa2b1, Srrm2, Tmem158, Ranbp3l, Bmp3, Camk2d, Gm24447 
## PC_ 2 
## Positive:  Ubb, Gm22133, Fth1, Mt1, Ldhb, Ftl1, Hspa1a, H3f3b, Fos, Eif1 
##     Junb, Cd63, Car15, Hspa1b, Prdx1, Rps24, Jun, Fxyd2, Mpc2, Cd9 
##     Actb, Mt2, Jund, Rpl7, Rps5, Mgst1, Btg2, Rps27a, Tmem213, Itm2b 
## Negative:  Malat1, Mir6236, Egf, Gm37376, CT010467.1, Umod, mt-Nd5, mt-Nd4l, Tmem52b, Nme7 
##     mt-Rnr1, Neat1, Slc12a1, Gm24447, mt-Nd6, mt-Nd2, mt-Co1, Atp1b1, Gm28438, Kcnq1ot1 
##     mt-Nd4, Lars2, Etnk1, mt-Rnr2, Wnk1, Dst, Foxq1, Sult1d1, Slc5a3, Atrx 
## PC_ 3 
## Positive:  Car15, Pappa2, Gm22133, Ldhb, Fth1, Mgst1, Itm2b, Cd63, Mpc2, Cd9 
##     Aard, Tmem213, Sfrp1, Ftl1, Fxyd2, Tmem59, Bsg, Ramp3, Prdx1, Clu 
##     Mcub, Tmem158, Irx1, Ppp1r1a, Sdc4, Ndfip1, Tmbim6, Tmem176a, Rpl9, Cldn10 
## Negative:  Fos, Junb, Jun, Egr1, Btg2, Hspa1b, Hspa1a, Atf3, Zfp36, Ier2 
##     Fosb, Klf6, Socs3, Klf2, Jund, Dusp1, Dnajb1, Rhob, Gadd45g, Malat1 
##     Gadd45b, Tsc22d1, Klf4, Ubc, H1f2, Cebpd, Ppp1r15a, Actb, Gm37376, Gm42793 
## PC_ 4 
## Positive:  Mt1, Gm8420, mt-Nd4l, mt-Co1, Gm8885, Rps21, Gm28438, mt-Rnr1, mt-Rnr2, Gm28437 
##     Apoe, Mt2, Rpl37, mt-Nd5, Rpl38, Gm28037, Gm11808, Gm3511, Rpl37a, Gm28661 
##     Rps28, Gm29216, Gm11353, mt-Cytb, Gm10925, mt-Nd2, Aebp1, Atp5md, Gm24447, Rpl39 
## Negative:  Pappa2, Umod, Egf, Aard, Tmem52b, Sult1d1, Tmem158, Car15, Mcub, Ptgs2 
##     Wwc2, Hsp90b1, Foxq1, Fth1, Dctd, Wnk1, Cd9, Gm22133, Iyd, Ptprz1 
##     Klk1, Sfrp1, Rpl15-ps2, Cdkn1c, Calr, Sec14l1, Atp1b1, Robo2, Clu, Tmsb4x 
## PC_ 5 
## Positive:  S100g, Gm8420, Tpt1, Rps21, Rps12, Pappa2, Rpl41, Rpl37, Gm8885, Rpl38 
##     Rpl37a, Mir6236, Rpl23, Rpl39, Gm11808, Gm6136, Actb, Rps27, Rpl23a, Gm3511 
##     Tmsb10, Rps26, Rpl30, Ppia, Tmem52b, Rpl5-ps1, Gm7206, Gm11353, Rpl34, Rps27a 
## Negative:  Hspa1a, Klk1, Hspa1b, Gm22133, mt-Rnr2, Car15, Atp1a1, Gm13340, Gm28437, Gm10925 
##     CT010467.1, mt-Cytb, Itm2b, mt-Nd4, Rpl15-ps2, Cd63, Clcnkb, Fth1, Gm28661, Ftl1 
##     Jun, Sfrp1, Gm29216, mt-Rnr1, Rpl13, Ptger3, Gpx6, Hspa8, Cldn10, Atp4a
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11529
## Number of edges: 377768
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7894
## Number of communities: 15
## Elapsed time: 1 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:54:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:54:27 Read 11529 rows and found 30 numeric columns
## 12:54:27 Using Annoy for neighbor search, n_neighbors = 30
## 12:54:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:54:27 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmpm8cAgM/file169b9172a16f2
## 12:54:27 Searching Annoy index using 1 thread, search_k = 3000
## 12:54:29 Annoy recall = 100%
## 12:54:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:54:30 Initializing from normalized Laplacian + noise (using RSpectra)
## 12:54:30 Commencing optimization for 200 epochs, with 504322 positive edges
## 12:54:30 Using rng type: pcg
## 12:54:32 Optimization finished
DimPlot(SO2)

SO2 <- subset(SO2, features = grep("^mt-|^Rp|^Gm", rownames(SO2), invert = TRUE, value = TRUE))

SO.markers2 <- FindAllMarkers(SO2, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
SO.markers2 %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
SO.markers2 %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 5) %>%
    ungroup() -> top10
DoHeatmap(SO2, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(SO2, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay:
## Wnt10b, Meg3, Pvalb, Ifi47, Speer4e, Fli1, Wnk3, Slit3, Glycam1, Gjb3, Dcdc5,
## Il3ra

DotPlot(SO2,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()
## Warning: The following requested variables were not found: Slc5a12, Slc13a3,
## Havcr1, Kdr, Pdgfrb, Pdgfra, Piezo2, Upk1b

5.1 Simplifying into a few types

markers.to.plot2 <- c("Pappa2",
                      "Ptger3",
                      "Aard",
                      "Ptgs2",
                      "Egf",
                      "Umod",
                      "Cldn19",
                      "Foxq1",
                      "Sult1d1",
                      "Fos",
                      "Junb",
                      "Hspb1",
                      "Cxcl10",
                      "Isg15",
                      "Cryab"
)

DimPlot(SO2)

DotPlot(SO2,
features = markers.to.plot2,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

SO4 <- subset(SO2, idents = c("14"), invert = TRUE)


SO4@meta.data <- SO4@meta.data %>%
  mutate(subclass_MD = dplyr::case_when(
    seurat_clusters %in% c(0,1,2,3,4,6,10) ~ "type_1",
    seurat_clusters %in% c(5,7,12) ~ "type_2a",
    seurat_clusters == 8 ~ "type_2b",
    seurat_clusters == 9 ~ "type_3",
    seurat_clusters == 11 ~ "type_4",
    seurat_clusters == 13 ~ "type_2c",
  ))


SO4@meta.data$subclass_MD <- factor(
  SO4@meta.data$subclass_MD, 
  levels = c("type_1", "type_2a", "type_2b", "type_2c", "type_3","type_4")
)


SO4@meta.data <- SO4@meta.data %>%
  mutate(subclass2_MD = dplyr::case_when(
    seurat_clusters %in% c(0,1,2,3,4,6,10) ~ "type_1",
    seurat_clusters %in% c(5,7,8,12,13) ~ "type_2",
    seurat_clusters == 9 ~ "type_3",
    seurat_clusters == 11 ~ "type_4"
  ))

SO4@meta.data$subclass2_MD <- factor(
  SO4@meta.data$subclass2_MD, 
  levels = c("type_1", "type_2","type_3","type_4")
)

Idents(SO4) <- SO4@meta.data$subclass_MD

DimPlot(object = SO4, reduction = "umap", group.by = "subclass_MD", label = TRUE)

DimPlot(object = SO4, reduction = "umap", group.by = "subclass2_MD", label = TRUE)

DimPlot(SO4,group.by = "subclass_MD",split.by = "treatment")

6 Saving Seurat Object

save(SO4, file = here("jk_code", "JK_cleanMD.rds"))